Block 1: Initial Setup, Imports, Device, DataLoaders, and Transformsยถ

Inย [2]:
# Block 1: Initial Setup, Imports, Device, DataLoaders, and Transforms
import os
import copy
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
# Use torchvision.transforms.v2 for modern augmentations
from torchvision import transforms, datasets, models
from torch.optim import SGD # Using SGD as in your previous setup
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import json
import random # For random image selection
import requests # For URL input in Streamlit-like interface (though simplified here)
from io import BytesIO # For handling image data from URLs

# --- Global Configuration ---
# Ensure deterministic behavior for reproducibility
torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False # Recommended for reproducibility

print("--- Block 1: Initial Setup and Data Loading ---")

# 1) GPU setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == 'cuda':
    print(f"  CUDA device name: {torch.cuda.get_device_name(0)}")
    print(f"  CUDA memory allocated: {torch.cuda.memory_allocated(0) / (1024**2):.2f} MB")
    print(f"  CUDA memory cached: {torch.cuda.memory_reserved(0) / (1024**2):.2f} MB")

# 2) Data paths and hyper-parameters
DATA_ROOT = "./DFU/Patches" # This is where your 'Abnormal' and 'Normal' folders are
IMG_SIZE = 224
BATCH_SIZE = 32

# DataLoader optimization parameters
NUM_WORKERS = os.cpu_count() // 2 or 1 # Experiment with this for optimal CPU/GPU balance
PIN_MEMORY = True # Speeds up data transfer to GPU

print(f"DataLoader will use {NUM_WORKERS} workers and pin_memory={PIN_MEMORY}")

# --- Advanced Data Transforms for Training (with RandAugment) ---
# RandAugment is an automated augmentation policy
train_tfm = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)), # Resize before RandAugment (or use RandomResizedCrop)
    transforms.RandAugment(num_ops=2, magnitude=9), # Apply 2 random operations with mag 9
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Transforms for validation and test (no augmentation, just resize and normalize)
val_test_tfm = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# --- Dataset Loading and Splitting ---
# Load full dataset (pre-split into train/val/test by ImageFolder's structure or split dynamically)
full_ds = datasets.ImageFolder(DATA_ROOT)
class_names = full_ds.classes
print("Classes:", class_names)
print(f"Initial dataset size: {len(full_ds)}")

# Apply different transforms for train vs val/test splits
full_ds_train_transformed = datasets.ImageFolder(DATA_ROOT, transform=train_tfm)
full_ds_val_test_transformed = datasets.ImageFolder(DATA_ROOT, transform=val_test_tfm)

# Split: 70% train / 15% val / 15% test
n_total = len(full_ds)
n_train = int(0.7 * n_total)
n_val = int(0.15 * n_total)
n_test = n_total - n_train - n_val

indices = list(range(n_total))
np.random.shuffle(indices) # Shuffle indices for reproducible random split
train_indices, val_indices, test_indices = indices[:n_train], indices[n_train:n_train+n_val], indices[n_train+n_val:]

train_ds = torch.utils.data.Subset(full_ds_train_transformed, train_indices)
val_ds = torch.utils.data.Subset(full_ds_val_test_transformed, val_indices)
test_ds = torch.utils.data.Subset(full_ds_val_test_transformed, test_indices)

print(f"Sizes โ–ถ train: {len(train_ds)} | val: {len(val_ds)} | test: {len(test_ds)}")

# Create DataLoaders
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False,
                         num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)

print("--- Block 1 Complete ---")
--- Block 1: Initial Setup and Data Loading ---
Using device: cuda
  CUDA device name: NVIDIA GeForce RTX 3050 Ti Laptop GPU
  CUDA memory allocated: 0.00 MB
  CUDA memory cached: 0.00 MB
DataLoader will use 10 workers and pin_memory=True
Classes: ['Abnormal(Ulcer)', 'Normal(Healthy skin)']
Initial dataset size: 1055
Sizes โ–ถ train: 738 | val: 158 | test: 159
--- Block 1 Complete ---

Block 2: EfficientNetB0 Model, Loss, Optimizer, Schedulerยถ

Inย [3]:
# Block 2: EfficientNetB0 Model, Loss, Optimizer, Scheduler
import torch.optim as optim
import torch.nn.functional as F

print("\n--- Block 2: EfficientNetB0 Model & Training Setup ---")

# Load pre-trained EfficientNetB0 model
# Using the newer weights API for torchvision >= 0.13
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)

# Freeze all parameters in the feature extractor (convolutional layers)
for param in model.features.parameters():
    param.requires_grad = False

# Modify the classifier head for binary classification
# EfficientNet's classifier is typically accessed via model.classifier
# It's a Sequential block: (0): Dropout, (1): Linear
num_ftrs = model.classifier[1].in_features # Get input features to the last linear layer
model.classifier[1] = nn.Linear(num_ftrs, 1) # Replace the last layer with a single output

# Move model to the configured device (GPU if available)
model = model.to(device)

# Loss Function: Binary Cross-Entropy with Logits (recommended for single-output binary classification)
criterion = nn.BCEWithLogitsLoss()

# Optimizer: Adam (often works well with transfer learning, more adaptive than SGD)
# Using Adam, which is generally a good default for new models
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning Rate Scheduler: ReduceLROnPlateau
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=10) # Adjust patience if needed

print("EfficientNetB0 Model, Criterion, Optimizer, and Scheduler defined.")
print(f"Model will train on: {next(model.parameters()).device}")

# Optional: Print model summary to verify architecture
# from torchsummary import summary
# try:
#     summary(model, (3, IMG_SIZE, IMG_SIZE))
# except Exception as e:
#     print(f"Could not print model summary (might need to install torchsummary or adjust input shape): {e}")

print("\n--- Block 2 Complete ---")
--- Block 2: EfficientNetB0 Model & Training Setup ---
EfficientNetB0 Model, Criterion, Optimizer, and Scheduler defined.
Model will train on: cuda:0

--- Block 2 Complete ---

Block 3: Training with Early Stopping & History Savingยถ

Inย [ย ]:
# Block 2: Class Imbalance Handling

print("\n--- Block 2: Class Imbalance Handling ---")

# Calculate class weights for weighted loss
class_counts = [95, 114]  # Example counts for Normal and Ulcer
pos_weight = torch.tensor(class_counts[0] / class_counts[1], dtype=torch.float).to(device)

# Define weighted loss for binary classification
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

# Augment minority class during training
train_tfm = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

print("--- Block 2: Class Imbalance Handling Complete ---")

# Block 3: Training with Early Stopping & History Saving
import copy
import json

print("\n--- Block 3: Training with Early Stopping ---")

best_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
patience = 20 # Number of epochs to wait for validation accuracy improvement
wait = 0

EPOCHS = 100 # Maximum number of epochs. Early stopping will likely stop earlier.

# Lists to store history for plotting and diagnostics
train_losses_history = []
train_accuracies_history = []
val_losses_history = []
val_accuracies_history = []

for epoch in range(1, EPOCHS + 1):
    model.train() # Set model to training mode
    train_loss = 0.0
    correct_train_predictions = 0
    total_train_samples = 0

    # Training loop
    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS} [Train]"):
        imgs = imgs.to(device)
        labels = labels.float().unsqueeze(1).to(device)

        optimizer.zero_grad()
        out = model(imgs)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * imgs.size(0)
        
        preds = (torch.sigmoid(out) >= 0.5).float()
        correct_train_predictions += (preds == labels).sum().item()
        total_train_samples += labels.size(0)

    train_loss /= len(train_loader.dataset)
    train_acc = correct_train_predictions / total_train_samples

    # Validation loop
    model.eval() # Set model to evaluation mode
    val_loss = 0.0
    correct_val_predictions = 0
    total_val_samples = 0

    with torch.no_grad():
        for imgs, labels in tqdm(val_loader, desc=f"Epoch {epoch}/{EPOCHS} [Val]"):
            imgs = imgs.to(device)
            labels_device = labels.float().unsqueeze(1).to(device)

            logits = model(imgs)
            loss = criterion(logits, labels_device)
            val_loss += loss.item() * imgs.size(0)
            
            preds_val = (torch.sigmoid(logits) >= 0.5).float()
            correct_val_predictions += (preds_val == labels_device).sum().item()
            total_val_samples += labels_device.size(0)

    val_loss /= len(val_loader.dataset)
    val_acc = correct_val_predictions / total_val_samples

    scheduler.step(val_acc)

    print(f"Epoch {epoch}: Train_Loss={train_loss:.4f}, Train_Acc={train_acc:.4f} | "
          f"Val_Loss={val_loss:.4f}, Val_Acc={val_acc:.4f}")

    # Store metrics for history
    train_losses_history.append(train_loss)
    train_accuracies_history.append(train_acc)
    val_losses_history.append(val_loss)
    val_accuracies_history.append(val_acc)

    # Early stopping logic
    if val_acc > best_acc:
        best_acc = val_acc
        best_wts = copy.deepcopy(model.state_dict())
        wait = 0
        print(f"--- New best validation accuracy: {best_acc:.4f} (Model weights updated) ---")
    else:
        wait += 1
        if wait >= patience:
            print(f"Early stopping at epoch {epoch}. No improvement for {patience} epochs.")
            print(f"Best validation accuracy achieved: {best_acc:.4f}")
            break

# Load best weights into the model before saving
model.load_state_dict(best_wts)
torch.save(model.state_dict(), "outputs/efficientnet_dfu_best.pth") # Save with new name for EfficientNet
print("Saved best model โž” outputs/efficientnet_dfu_best.pth")

# Save training history for later analysis
history_data = {
    'train_losses': train_losses_history,
    'train_accuracies': train_accuracies_history,
    'val_losses': val_losses_history,
    'val_accuracies': val_accuracies_history,
    'best_val_acc': best_acc
}
with open('outputs/training_history_efficientnet.json', 'w') as f: # Save with new name
    json.dump(history_data, f)
print("Saved training history โž” outputs/training_history_efficientnet.json")
--- Block 2: Class Imbalance Handling ---
--- Block 2: Class Imbalance Handling Complete ---

--- Block 3: Training with Early Stopping ---
Epoch 1/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.23it/s]
Epoch 1/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.23it/s]
Epoch 1/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 1/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 1: Train_Loss=0.4479, Train_Acc=0.7317 | Val_Loss=0.3082, Val_Acc=0.9494
--- New best validation accuracy: 0.9494 (Model weights updated) ---
Epoch 2/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.75it/s]
Epoch 2/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.75it/s]
Epoch 2/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.11s/it]

Epoch 2: Train_Loss=0.2787, Train_Acc=0.9187 | Val_Loss=0.2191, Val_Acc=0.9810
--- New best validation accuracy: 0.9810 (Model weights updated) ---
Epoch 3/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.78it/s]
Epoch 3/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.78it/s]
Epoch 3/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.07s/it]
Epoch 3/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.07s/it]
Epoch 3: Train_Loss=0.2020, Train_Acc=0.9566 | Val_Loss=0.1720, Val_Acc=0.9810
Epoch 4/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.82it/s]
Epoch 4/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.82it/s]
Epoch 4/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.11s/it]
Epoch 4/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.11s/it]
Epoch 4: Train_Loss=0.1688, Train_Acc=0.9634 | Val_Loss=0.1416, Val_Acc=0.9810
Epoch 5/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.73it/s]
Epoch 5/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.73it/s]
Epoch 5/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.07s/it]
Epoch 5/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.07s/it]
Epoch 5: Train_Loss=0.1513, Train_Acc=0.9539 | Val_Loss=0.1288, Val_Acc=0.9810
Epoch 6/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.59it/s]
Epoch 6/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.59it/s]
Epoch 6/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.23s/it]

Epoch 6: Train_Loss=0.1421, Train_Acc=0.9512 | Val_Loss=0.1223, Val_Acc=0.9873
--- New best validation accuracy: 0.9873 (Model weights updated) ---
Epoch 7/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.64it/s]
Epoch 7/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.64it/s]
Epoch 7/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.12s/it]
Epoch 7/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.12s/it]
Epoch 7: Train_Loss=0.1307, Train_Acc=0.9621 | Val_Loss=0.1110, Val_Acc=0.9810
Epoch 8/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.62it/s]
Epoch 8/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.62it/s]
Epoch 8/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.05s/it]
Epoch 8/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.05s/it]
Epoch 8: Train_Loss=0.1214, Train_Acc=0.9661 | Val_Loss=0.1105, Val_Acc=0.9810
Epoch 9/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.69it/s]
Epoch 9/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.69it/s]
Epoch 9/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.14s/it]
Epoch 9/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.14s/it]
Epoch 9: Train_Loss=0.1283, Train_Acc=0.9580 | Val_Loss=0.0935, Val_Acc=0.9873
Epoch 10/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.73it/s]
Epoch 10/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.73it/s]
Epoch 10/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.10s/it]
Epoch 10/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.10s/it]
Epoch 10: Train_Loss=0.1256, Train_Acc=0.9526 | Val_Loss=0.0920, Val_Acc=0.9747
Epoch 11/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.83it/s]
Epoch 11/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.83it/s]
Epoch 11/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.08s/it]
Epoch 11/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.08s/it]
Epoch 11: Train_Loss=0.0927, Train_Acc=0.9729 | Val_Loss=0.0975, Val_Acc=0.9747
Epoch 12/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.75it/s]
Epoch 12/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.75it/s]
Epoch 12/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.11s/it]
Epoch 12/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.11s/it]
Epoch 12: Train_Loss=0.1085, Train_Acc=0.9648 | Val_Loss=0.0852, Val_Acc=0.9747
Epoch 13/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.55it/s]
Epoch 13/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.55it/s]
Epoch 13/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.39s/it]
Epoch 13/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.39s/it]
Epoch 13: Train_Loss=0.1226, Train_Acc=0.9566 | Val_Loss=0.0844, Val_Acc=0.9684
Epoch 14/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.26it/s]
Epoch 14/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.26it/s]
Epoch 14/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.20s/it]
Epoch 14/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.20s/it]
Epoch 14: Train_Loss=0.1052, Train_Acc=0.9675 | Val_Loss=0.0811, Val_Acc=0.9810
Epoch 15/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.37it/s]
Epoch 15/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.37it/s]
Epoch 15/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.26s/it]
Epoch 15/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.26s/it]
Epoch 15: Train_Loss=0.1046, Train_Acc=0.9607 | Val_Loss=0.0857, Val_Acc=0.9810
Epoch 16/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.30it/s]
Epoch 16/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.30it/s]
Epoch 16/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 16/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 16: Train_Loss=0.0923, Train_Acc=0.9688 | Val_Loss=0.0788, Val_Acc=0.9810
Epoch 17/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.53it/s]
Epoch 17/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.53it/s]
Epoch 17/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.17s/it]
Epoch 17/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.17s/it]
Epoch 17: Train_Loss=0.0994, Train_Acc=0.9743 | Val_Loss=0.0727, Val_Acc=0.9873
Epoch 18/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.39it/s]
Epoch 18/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:07<00:00,  3.39it/s]
Epoch 18/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.23s/it]
Epoch 18/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.23s/it]
Epoch 18: Train_Loss=0.0990, Train_Acc=0.9661 | Val_Loss=0.0869, Val_Acc=0.9873
Epoch 19/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.61it/s]
Epoch 19/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.61it/s]
Epoch 19/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.32s/it]
Epoch 19/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.32s/it]
Epoch 19: Train_Loss=0.1130, Train_Acc=0.9621 | Val_Loss=0.0781, Val_Acc=0.9747
Epoch 20/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.51it/s]
Epoch 20/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.51it/s]
Epoch 20/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 20/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 20: Train_Loss=0.1086, Train_Acc=0.9661 | Val_Loss=0.0909, Val_Acc=0.9684
Epoch 21/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.62it/s]
Epoch 21/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.62it/s]
Epoch 21/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 21/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 21: Train_Loss=0.0721, Train_Acc=0.9783 | Val_Loss=0.0752, Val_Acc=0.9810
Epoch 22/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.70it/s]
Epoch 22/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.70it/s]
Epoch 22/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.10s/it]
Epoch 22/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.10s/it]
Epoch 22: Train_Loss=0.0844, Train_Acc=0.9756 | Val_Loss=0.0839, Val_Acc=0.9684
Epoch 23/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.71it/s]
Epoch 23/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.71it/s]
Epoch 23/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.11s/it]
Epoch 23/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.11s/it]
Epoch 23: Train_Loss=0.1030, Train_Acc=0.9648 | Val_Loss=0.0756, Val_Acc=0.9810
Epoch 24/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.73it/s]
Epoch 24/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.73it/s]
Epoch 24/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 24/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.15s/it]
Epoch 24: Train_Loss=0.0938, Train_Acc=0.9688 | Val_Loss=0.0726, Val_Acc=0.9747
Epoch 25/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.51it/s]
Epoch 25/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.51it/s]
Epoch 25/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.11s/it]
Epoch 25/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.11s/it]
Epoch 25: Train_Loss=0.0825, Train_Acc=0.9756 | Val_Loss=0.0882, Val_Acc=0.9747
Epoch 26/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.71it/s]
Epoch 26/100 [Train]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 24/24 [00:06<00:00,  3.71it/s]
Epoch 26/100 [Val]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:05<00:00,  1.10s/it]
Epoch 26: Train_Loss=0.0880, Train_Acc=0.9715 | Val_Loss=0.0860, Val_Acc=0.9747
Early stopping at epoch 26. No improvement for 20 epochs.
Best validation accuracy achieved: 0.9873
Saved best model โž” outputs/efficientnet_dfu_best.pth
Saved training history โž” outputs/training_history_efficientnet.json
--- Block 3 Complete ---

Block 4: Test Set Evaluation & Overfitting Diagnosticsยถ

Inย [4]:
# Block 4: Test Set Evaluation & Overfitting Diagnostics
from tabulate import tabulate

print("\n--- Block 4: Test Set Evaluation & Overfitting Diagnostics ---")

# --- Essential Variable Check ---
# 'model', 'device', 'test_loader', 'class_names' must be defined from previous blocks
required_vars_b4 = ['model', 'device', 'test_loader', 'class_names']
for var_name in required_vars_b4:
    if var_name not in locals():
        print(f"ERROR: Variable '{var_name}' is not defined for Block 4.")
        print("Please ensure Block 1 and Block 2 have been run before running Block 4.")
        exit()

# --- Load the best model weights ---
model_path = "outputs/efficientnet_dfu_best.pth" # Load the EfficientNet model
if os.path.exists(model_path):
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval() # Set model to evaluation mode
    print(f"Loaded best model weights from {model_path} for evaluation.")
else:
    print(f"ERROR: Model weights '{model_path}' not found. Cannot perform test set evaluation.")
    print("Please ensure Block 3 was run successfully at least once to save the model.")
    exit()

# --- Load training history for overfitting diagnostics ---
history_path = 'outputs/training_history_efficientnet.json' # Load EfficientNet history
train_losses_loaded, train_accuracies_loaded, val_losses_loaded, val_accuracies_loaded = [], [], [], []
best_val_acc_loaded = 0.0

if os.path.exists(history_path):
    try:
        with open(history_path, 'r') as f:
            history_data = json.load(f)
        train_losses_loaded = history_data.get('train_losses', [])
        train_accuracies_loaded = history_data.get('train_accuracies', [])
        val_losses_loaded = history_data.get('val_losses', [])
        val_accuracies_loaded = history_data.get('val_accuracies', [])
        best_val_acc_loaded = history_data.get('best_val_acc', 0.0)
        print(f"Loaded training history from {history_path}")
    except Exception as e:
        print(f"WARNING: Error loading training history from '{history_path}': {e}")
        print("Overfitting plots/table will be skipped.")
else:
    print(f"WARNING: Training history '{history_path}' not found. Overfitting plots/table will be skipped.")


# --- Function to calculate and display overfitting diagnostics ---
def analyze_overfitting(t_losses, v_losses, t_accs, v_accs, b_val_acc):
    if not t_losses or not v_losses or not t_accs or not v_accs:
        print("Cannot generate overfitting diagnostics: History data is incomplete or empty.")
        return

    epochs = range(1, len(t_losses) + 1)

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, t_losses, 'b', label='Training Loss')
    plt.plot(epochs, v_losses, 'r', label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    plt.subplot(1, 2, 2)
    plt.plot(epochs, t_accs, 'b', label='Training Accuracy')
    plt.plot(epochs, v_accs, 'r', label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()

    best_epoch_idx = np.argmax(v_accs)
    best_epoch_num = best_epoch_idx + 1

    train_acc_at_best_val_epoch = t_accs[best_epoch_idx]
    val_acc_at_best_val_epoch = v_accs[best_epoch_idx]
    train_loss_at_best_val_epoch = t_losses[best_epoch_idx]
    val_loss_at_best_val_epoch = v_losses[best_epoch_idx]

    last_epoch_idx = len(t_accs) - 1
    last_epoch_num = last_epoch_idx + 1
    train_acc_at_last_epoch = t_accs[last_epoch_idx]
    val_acc_at_last_epoch = v_accs[last_epoch_idx]
    train_loss_at_last_epoch = t_losses[last_epoch_idx]
    val_loss_at_last_epoch = v_losses[last_epoch_idx]

    print("\nOverfitting Diagnostics Summary:")
    overfitting_table_data = [
        ["Metric", "Best Val Epoch", "Last Epoch Trained"],
        ["Epoch #", best_epoch_num, last_epoch_num],
        ["Train Loss", f"{train_loss_at_best_val_epoch:.4f}", f"{train_loss_at_last_epoch:.4f}"],
        ["Val Loss", f"{val_loss_at_best_val_epoch:.4f}", f"{val_loss_at_last_epoch:.4f}"],
        ["Train Accuracy", f"{train_acc_at_best_val_epoch:.4f}", f"{train_acc_at_last_epoch:.4f}"],
        ["Val Accuracy", f"{val_acc_at_best_val_epoch:.4f}", f"{val_acc_at_last_epoch:.4f}"],
        ["Train-Val Loss Diff", f"{abs(train_loss_at_best_val_epoch - val_loss_at_best_val_epoch):.4f}", f"{abs(train_loss_at_last_epoch - val_loss_at_last_epoch):.4f}"],
        ["Train-Val Acc Diff", f"{abs(train_acc_at_best_val_epoch - val_acc_at_best_val_epoch):.4f}", f"{abs(train_acc_at_last_epoch - val_acc_at_last_epoch):.4f}"]
    ]
    print(tabulate(overfitting_table_data, headers="firstrow", tablefmt="grid"))

    print("\nInterpretation:")
    print("  - 'Train-Val Loss Diff' and 'Train-Val Acc Diff' show the generalization gap.")
    print("  - If Val Loss starts increasing while Train Loss continues to decrease, it's a sign of overfitting.")
    print("  - If Val Accuracy plateaus or decreases while Train Accuracy continues to rise, it's overfitting.")
    print("  - A significant difference between metrics at 'Best Val Epoch' and 'Last Epoch Trained'")
    print("    indicates that early stopping was effective in preventing further overfitting.")


# --- Test Set Evaluation (Main execution) ---
all_probs, all_labels = [], []
with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc="[Test Set Evaluation]"):
        imgs = imgs.to(device)
        logits = model(imgs)
        probs = torch.sigmoid(logits).cpu().numpy().ravel()
        all_probs.extend(probs)
        all_labels.extend(labels.numpy())

preds = np.array(all_probs) >= 0.5

cm = confusion_matrix(all_labels, preds)
tn, fp, fn, tp = cm.ravel()

accuracy = accuracy_score(all_labels, preds)
sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
precision = precision_score(all_labels, preds, zero_division=0)
recall = recall_score(all_labels, preds, zero_division=0)
f1 = f1_score(all_labels, preds, zero_division=0)
auc = roc_auc_score(all_labels, all_probs)

metrics = {
    "Accuracy": accuracy,
    "Sensitivity (Recall for P-class)": sensitivity,
    "Specificity (TNR for N-class)": specificity,
    "Precision (PPV for P-class)": precision,
    "Recall (Same as Sensitivity)": recall,
    "F1-Score": f1,
    "AUC": auc
}

print("\nTest set metrics:")
for k,v in metrics.items():
    print(f"{k:<30}: {v:.4f}")

print("\nConfusion Matrix:")
print(cm)
print(f"TN: {tn}, FP: {fp}")
print(f"FN: {fn}, TP: {tp}")

# Visualize Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Test Set Confusion Matrix')
plt.show()

# --- Call the overfitting analysis function ---
print("\n--- Overfitting Diagnostics ---")
if train_losses_loaded and val_losses_loaded:
    analyze_overfitting(train_losses_loaded, val_losses_loaded, train_accuracies_loaded, val_accuracies_loaded, best_val_acc_loaded)
else:
    print("Cannot perform overfitting diagnostics: Training history not available or empty.")

print("--- Block 4 Complete ---")
--- Block 4: Test Set Evaluation & Overfitting Diagnostics ---
Loaded best model weights from outputs/efficientnet_dfu_best.pth for evaluation.
Loaded training history from outputs/training_history_efficientnet.json
[Test Set Evaluation]: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:06<00:00,  1.28s/it]
Test set metrics:
Accuracy                      : 0.9937
Sensitivity (Recall for P-class): 1.0000
Specificity (TNR for N-class) : 0.9873
Precision (PPV for P-class)   : 0.9877
Recall (Same as Sensitivity)  : 1.0000
F1-Score                      : 0.9938
AUC                           : 0.9995

Confusion Matrix:
[[78  1]
 [ 0 80]]
TN: 78, FP: 1
FN: 0, TP: 80

No description has been provided for this image
--- Overfitting Diagnostics ---
No description has been provided for this image
Overfitting Diagnostics Summary:
+---------------------+------------------+----------------------+
| Metric              |   Best Val Epoch |   Last Epoch Trained |
+=====================+==================+======================+
| Epoch #             |           6      |              26      |
+---------------------+------------------+----------------------+
| Train Loss          |           0.1421 |               0.088  |
+---------------------+------------------+----------------------+
| Val Loss            |           0.1223 |               0.086  |
+---------------------+------------------+----------------------+
| Train Accuracy      |           0.9512 |               0.9715 |
+---------------------+------------------+----------------------+
| Val Accuracy        |           0.9873 |               0.9747 |
+---------------------+------------------+----------------------+
| Train-Val Loss Diff |           0.0198 |               0.002  |
+---------------------+------------------+----------------------+
| Train-Val Acc Diff  |           0.0361 |               0.0031 |
+---------------------+------------------+----------------------+

Interpretation:
  - 'Train-Val Loss Diff' and 'Train-Val Acc Diff' show the generalization gap.
  - If Val Loss starts increasing while Train Loss continues to decrease, it's a sign of overfitting.
  - If Val Accuracy plateaus or decreases while Train Accuracy continues to rise, it's overfitting.
  - A significant difference between metrics at 'Best Val Epoch' and 'Last Epoch Trained'
    indicates that early stopping was effective in preventing further overfitting.
--- Block 4 Complete ---
Inย [5]:
# Calibration curve using Platt scaling
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import calibration_curve
import numpy as np

# Extract logits and true labels from test set
logits = []
true_labels = []
model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        logits.append(outputs.cpu().numpy())
        true_labels.append(labels.cpu().numpy())
logits = np.concatenate(logits)
true_labels = np.concatenate(true_labels)

# Fit logistic regression for Platt scaling
log_reg = LogisticRegression()
log_reg.fit(logits, true_labels)

# Generate calibration curve
prob_preds = log_reg.predict_proba(logits)[:, 1]
true_prob, pred_prob = calibration_curve(true_labels, prob_preds, n_bins=10)

# Plot calibration curve
import matplotlib.pyplot as plt
plt.figure()
plt.plot(pred_prob, true_prob, marker='o', label='Calibration curve')
plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly calibrated')
plt.xlabel('Predicted probability')
plt.ylabel('True probability')
plt.legend()
plt.title('Calibration Curve')
plt.show()
No description has been provided for this image

Block 5: Random 5 Images from DFU/TestSet with Custom Grad-CAM and Pseudo-Segmentationยถ

Inย [8]:
# Block 5: TTA-Based Custom Grad-CAM + Pseudo-Segmentation

import os, random, sys, cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torchvision import transforms
from skimage.filters import threshold_otsu

# --- Custom Grad-CAM ---
class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.activations = None
        self.gradients = None

    def _save_activations(self, module, input, output):
        self.activations = output.detach()

    def _save_gradients(self, module, grad_input, grad_output):
        self.gradients = grad_output[0].detach()

    def __enter__(self):
        self.forward_handle = self.target_layer.register_forward_hook(self._save_activations)
        self.backward_handle = self.target_layer.register_full_backward_hook(self._save_gradients)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.forward_handle.remove()
        self.backward_handle.remove()

def compute_custom_grad_cam(model, input_tensor, target_layer, target_class=None):
    with GradCAM(model, target_layer) as cam:
        model.eval()
        input_tensor.requires_grad_(True)
        output = model(input_tensor)

        if target_class is None:
            target_class = 1 if torch.sigmoid(output).item() >= 0.5 else 0

        model.zero_grad()
        target = torch.ones_like(output) if target_class == 1 else -torch.ones_like(output)
        output.backward(gradient=target, retain_graph=True)

        weights = torch.mean(cam.gradients, dim=(2, 3), keepdim=True)
        cam_map = torch.sum(weights * cam.activations, dim=1, keepdim=True)
        cam_map = F.relu(cam_map).squeeze().cpu().numpy()

        cam_min, cam_max = np.min(cam_map), np.max(cam_map)
        if cam_max - cam_min < 1e-8:
            return np.zeros_like(cam_map)
        return (cam_map - cam_min) / (cam_max - cam_min + 1e-8)

def overlay_heatmap(img, heatmap, alpha=0.5):
    heatmap_resized = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    heatmap_colored = cv2.applyColorMap(np.uint8(255 * heatmap_resized), cv2.COLORMAP_JET)
    img_float = img.astype(np.float32) / 255.0
    heatmap_colored_float = heatmap_colored.astype(np.float32) / 255.0
    blended = cv2.addWeighted(img_float, alpha, heatmap_colored_float, 1 - alpha, 0)
    return np.uint8(255 * blended)

def get_pseudo_segmentation_mask(grayscale_cam, original_image_shape, threshold_method='otsu'):
    cam_resized = cv2.resize(grayscale_cam, original_image_shape)
    if np.max(cam_resized) - np.min(cam_resized) < 1e-6:
        return np.zeros_like(cam_resized, dtype=np.uint8)
    if threshold_method == 'otsu':
        thresh = threshold_otsu(cam_resized)
        binary_mask = cam_resized >= thresh
    else:
        binary_mask = cam_resized >= 0.5
    return binary_mask.astype(np.uint8)

# --- TTA Transforms ---
tta_transforms = [
    transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.RandomHorizontalFlip(p=1.0),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]),
    transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.RandomRotation(degrees=15),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]),
    transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]),
]

# --- Apply TTA for Prediction and Grad-CAM ---
def apply_tta_grad_cam_with_prediction(model, raw_image_pil, target_layer, tta_transforms):
    cams, probs = [], []

    for tfm in tta_transforms:
        input_tensor = tfm(raw_image_pil).unsqueeze(0).to(device)

        with torch.no_grad():
            logits = model(input_tensor)
            prob = torch.sigmoid(logits).item()
            probs.append(prob)

        pred_class = 1 if prob >= 0.5 else 0
        cam = compute_custom_grad_cam(model, input_tensor, target_layer, pred_class)
        cams.append(cam)

    avg_prob = np.mean(probs)
    final_class_idx = 1 if avg_prob >= 0.5 else 0
    avg_cam = np.mean(cams, axis=0)

    return final_class_idx, avg_prob, avg_cam

# --- Start Execution ---
print("\n--- Block 5: TTA-Based Grad-CAM + Pseudo-Segmentation ---")

required_vars = ['model', 'device', 'val_test_tfm', 'class_names', 'IMG_SIZE', 'DATA_ROOT']
for var in required_vars:
    if var not in locals():
        print(f"โŒ Missing variable: {var}. Ensure previous blocks are executed.")
        sys.exit(1)

# Load model
model_path = "outputs/efficientnet_dfu_best.pth"
if os.path.exists(model_path):
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    print(f"โœ… Loaded model weights from {model_path}")
else:
    print(f"โŒ Could not find model at {model_path}")
    sys.exit(1)

# Locate TestSet images
testset_folder = os.path.abspath(os.path.join(DATA_ROOT, "../TestSet"))
if not os.path.isdir(testset_folder):
    print(f"โŒ TestSet folder not found at {testset_folder}")
    sys.exit(1)

valid_exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.webp')
image_files = [os.path.join(root, f) for root, _, files in os.walk(testset_folder) for f in files if f.lower().endswith(valid_exts)]
if not image_files:
    print(f"โŒ No images found in {testset_folder}")
    sys.exit(1)

NUM_RANDOM_IMAGES = min(5, len(image_files))
random_image_paths = random.sample(image_files, NUM_RANDOM_IMAGES)
print(f"๐Ÿ” Selected {NUM_RANDOM_IMAGES} random image(s) from TestSet")

# --- Inference Loop ---
for idx, img_path in enumerate(random_image_paths):
    print(f"\n๐Ÿ“ท Image {idx+1}/{NUM_RANDOM_IMAGES}: {os.path.basename(img_path)}")
    try:
        raw_pil = Image.open(img_path).convert("RGB")
        raw_np = np.array(raw_pil)
        original_size = raw_pil.size  # (W, H)

        # ๐Ÿ” TTA-based prediction and Grad-CAM
        pred_class_idx, avg_prob, grayscale_cam = apply_tta_grad_cam_with_prediction(
            model, raw_pil, model.features[-1], tta_transforms
        )
        pred_class_name = class_names[pred_class_idx]

        # ๐Ÿง  Generate mask
        pseudo_mask = get_pseudo_segmentation_mask(grayscale_cam, original_size, threshold_method='otsu')

        # ๐Ÿ“ Estimate ulcer area
        if pred_class_name == class_names[0]:  # Assuming 'Abnormal' is index 0
            ulcer_pixels = np.sum(pseudo_mask)
            ulcer_pct = (ulcer_pixels / (pseudo_mask.shape[0] * pseudo_mask.shape[1])) * 100
        else:
            ulcer_pct = 0.0

        # ๐Ÿ–ผ๏ธ Display Results
        fig, ax = plt.subplots(1, 3, figsize=(24, 8))
        ax[0].imshow(raw_np)
        ax[0].set_title(f"Original\nPrediction: {pred_class_name} ({avg_prob:.2f})")
        ax[0].axis("off")

        cam_overlay = overlay_heatmap(raw_np, grayscale_cam)
        ax[1].imshow(cam_overlay)
        ax[1].set_title("TTA-Averaged Grad-CAM")
        ax[1].axis("off")

        if pred_class_name == class_names[0]:
            green_overlay = raw_np.astype(np.float32) / 255.0
            mask_rgb = np.zeros_like(green_overlay)
            mask_rgb[pseudo_mask == 1] = [0, 1, 0]
            blended = cv2.addWeighted(green_overlay, 0.7, mask_rgb, 0.3, 0)
            ax[2].imshow(np.uint8(255 * blended))
        else:
            ax[2].imshow(raw_np)
        ax[2].set_title("Pseudo-Segmentation Mask")
        ax[2].axis("off")

        plt.tight_layout()
        plt.show()

        print(f"๐Ÿงพ Predicted: {pred_class_name}")
        print(f"๐Ÿ”ข Probability: {avg_prob:.4f}")
        if pred_class_name == class_names[0]:
            print(f"๐Ÿ“ Ulcer area estimate: {ulcer_pct:.2f}%")
        else:
            print("โœ… No ulcer area estimated (healthy prediction).")

    except Exception as e:
        print(f"โš ๏ธ Error processing {os.path.basename(img_path)}: {e}")

print("--- โœ… Block 5 Complete ---")
--- Block 5: TTA-Based Grad-CAM + Pseudo-Segmentation ---
โœ… Loaded model weights from outputs/efficientnet_dfu_best.pth
๐Ÿ” Selected 5 random image(s) from TestSet

๐Ÿ“ท Image 1/5: lesioni-piede-diabetico_01.jpg
No description has been provided for this image
๐Ÿงพ Predicted: Abnormal(Ulcer)
๐Ÿ”ข Probability: 0.0286
๐Ÿ“ Ulcer area estimate: 14.30%

๐Ÿ“ท Image 2/5: 17.jpg
No description has been provided for this image
๐Ÿงพ Predicted: Abnormal(Ulcer)
๐Ÿ”ข Probability: 0.0091
๐Ÿ“ Ulcer area estimate: 31.51%

๐Ÿ“ท Image 3/5: foot-doctor-for-corn-callus-treatment.png
No description has been provided for this image
๐Ÿงพ Predicted: Abnormal(Ulcer)
๐Ÿ”ข Probability: 0.3766
๐Ÿ“ Ulcer area estimate: 25.15%

๐Ÿ“ท Image 4/5: 3.jpg
No description has been provided for this image
๐Ÿงพ Predicted: Abnormal(Ulcer)
๐Ÿ”ข Probability: 0.0379
๐Ÿ“ Ulcer area estimate: 20.72%

๐Ÿ“ท Image 5/5: images (18).jpg
No description has been provided for this image
๐Ÿงพ Predicted: Abnormal(Ulcer)
๐Ÿ”ข Probability: 0.0523
๐Ÿ“ Ulcer area estimate: 16.71%
--- โœ… Block 5 Complete ---